其他
CopyKat——基于高通量单细胞测序方法鉴定肿瘤细胞拷贝数变异和亚克隆结构
关注生信阿拉丁,学习不打烊~
今天来看CopyKat
基于高通量单细胞测序方法
鉴定肿瘤细胞拷贝数变异和亚克隆结构背 景
CopyKat 原 理 和 流 程
将单细胞转录组的基因表达矩阵作为输入,依据基因组坐标对基因进行注释、排序;利用Freeman–Tukey transformation来稳定方差;使用polynomial dynamic linear modeling (DLM)矫正矩阵中异常值。
为了获得高置信度的二倍体细胞(正常细胞),先将细胞分成几个小的不同层次的亚群中,并利用 Gaussian mixture model (GMM)估计每个层次亚群的方差,具有最小估计方差的亚群被认为是高置信度的二倍体细胞。
为了检测染色体断点,用Poisson-gamma模型和Markov chain Monte Carlo (MCMC)迭代计算每个基因窗口的后验均值,并用Kolmogorov–Smirnov(KS)检验将没有显著性差异的window合并。另,为了加速计算,先将细胞分成不同群,检测共同的染色体断点并合并形成细胞群的基因组断点。
通过对单细胞拷贝数矩阵聚类,将细胞分成的不同亚群,并计算非整倍体细胞(肿瘤细胞)和二倍体基质细胞(正常细胞)之间的最大距离,对于距离不显著的需要使用GMM模型预测单个肿瘤细胞。
将单细胞拷贝数数据聚类以鉴定克隆亚群,可进一步分析不同肿瘤亚群的基因表达差异。
CopyKat 使 用
library(devtools)
install_github("navinlabcode/copykat")
library(Seurat)
raw <- Read10X(data.dir = data.path.to.cellranger.outs)
raw <- CreateSeuratObject(counts = raw, project = "copycat.test", min.cells = 0, min.features = 0)
exp.rawdata <- as.matrix(raw@assays$RNA@counts)
write.table(exp.rawdata, file="exp.rawdata.txt", sep="\t", quote = FALSE, row.names = TRUE)
immune.combined<-readRDS("immune.combined_umap.rds")
counts_matrix = as.matrix(immune.combined@assays$RNA@counts)
write.table(counts_matrix,file="counts_matrix_new.txt",col.names=T,row.names=T,sep="\t",quote=F)
export R_LIBS="/DL_and_ML/library/" && ymcR
library(copykat)
copykat.test <- copykat(rawmat=counts_matrix, id.type="S", cell.line="no", ngene.chr=5, win.size=25, KS.cut=0.2, sam.name="test", distance="euclidean", n.cores=10) ##运行速度1675个细胞六分钟左右
pred.test <- data.frame(copykat.test$prediction)
head(pred.test)
CNA.test <- data.frame(copykat.test$CNAmat)
head(CNA.test[ , 1:5])
#Copykat可以生成用于衡量CNV水平的热图,行为细胞,列为基因组位置
my_palette <- colorRampPalette(rev(RColorBrewer::brewer.pal(n = 3, name = "RdBu")))(n = 999)
chr <- as.numeric(CNA.test$chrom) %% 2+1
rbPal1 <- colorRampPalette(c('grey60','grey30'))
CHR <- rbPal1(2)[as.numeric(chr)]
chr1 <- cbind(CHR,CHR)
rbPal5 <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = "Dark2")[2:1])
com.preN <- pred.test$copykat.pred
pred <- rbPal5(2)[as.numeric(factor(com.preN))]
cells <- rbind(pred,pred)
col_breaks=c(seq(-1,-0.4,length=50),seq(-0.4,-0.2,length=150),seq(-0.2,0.2,length=600),seq(0.2,0.4,length=150),seq(0.4, 1,length=50))
pdf("CNV_heatmap.pdf",8,10)
heatmap.3(t(CNA.test[,4:ncol(CNA.test)]),dendrogram="r", distfun = function(x) parallelDist::parDist(x,threads =4, method = "euclidean"), hclustfun = function(x) hclust(x, method="ward.D2"), ColSideColors=chr1,RowSideColors=cells,Colv=NA, Rowv=TRUE,
notecol="black",col=my_palette,breaks=col_breaks, key=TRUE,keysize=1, density.info="none", trace="none",
cexRow=0.1,cexCol=0.1,cex.main=1,cex.lab=0.1, symm=F,symkey=F,symbreaks=T,cex=1, cex.main=4, margins=c(10,10))
legend("topright", paste("pred.",names(table(com.preN)),sep=""), pch=15,col=RColorBrewer::brewer.pal(n = 8, name = "Dark2")[2:1], cex=0.6, bty="n")
dev.off()
tumor.cells <- pred.test$cell.names[which(pred.test$copykat.pred=="aneuploid")]
tumor.mat <- CNA.test[, which(colnames(CNA.test) %in% tumor.cells)]
hcc <- hclust(parallelDist::parDist(t(tumor.mat),threads =4, method = "euclidean"), method = "ward.D2")
hc.umap <- cutree(hcc,2)
rbPal6 <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = "Dark2")[3:4])
subpop <- rbPal6(2)[as.numeric(factor(hc.umap))]
cells <- rbind(subpop,subpop)
pdf("Subpopulation_CNV_heatmap.pdf",8,10)
heatmap.3(t(tumor.mat),dendrogram="r", distfun = function(x) parallelDist::parDist(x,threads =4, method = "euclidean"), hclustfun = function(x) hclust(x, method="ward.D2"), ColSideColors=chr1,RowSideColors=cells,Colv=NA, Rowv=TRUE,notecol="black",col=my_palette,breaks=col_breaks, key=TRUE, keysize=1, density.info="none", trace="none", cexRow=0.1,cexCol=0.1,cex.main=1,cex.lab=0.1, symm=F,symkey=F,symbreaks=T,cex=1, cex.main=4, margins=c(10,10))
legend("topright", c("c1","c2"), pch=15,col=RColorBrewer::brewer.pal(n = 8, name = "Dark2")[3:4], cex=0.9, bty='n')
dev.off()
rds <- immune.combined
rds <- NormalizeData(rds,verbose=F)
rds <- ScaleData(rds,verbose=F)
rds <- FindVariableFeatures(rds,verbose=F)
rds <- RunPCA(rds, npcs = 30, verbose = F)
rds_umap <- RunUMAP(rds, reduction = "pca",dims = 1:30)
rds_umap <- FindNeighbors(object = rds_umap , reduction = "pca", dims = 1:30)
rds_umap <- FindClusters(rds_umap , resolution = 0.5)
rownames(rds_umap@meta.data)[!rownames(rds_umap@meta.data) %in% pred.test$cell.names]
\#[1] "GTGTGATAGGCCATAG-1" "GCTTGGGAGGCTAACG-1" "GTACAGTAGTCCTGCG-1"
omit_cell <- data.frame(cell.names=c("GTGTGATAGGCCATAG-1","GCTTGGGAGGCTAACG-1","GTACAGTAGTCCTGCG-1"),copykat.pred=rep("unknown",3))
rownames(omit_cell)<-c("GTGTGATAGGCCATAG-1","GCTTGGGAGGCTAACG-1","GTACAGTAGTCCTGCG-1")
pred.test1 <-rbind(pred.test,omit_cell)
pred.test1$cell.names<-gsub('-','.',pred.test1$cell.names)
rownames(pred.test1)<-gsub('-','.',rownames(pred.test1))
rds_umap@meta.data$cell.names0<-rownames(rds_umap@meta.data)
rds_umap@meta.data$cell.names<-gsub('-','.',rownames(rds_umap@meta.data))
rds_umap@meta.data<-merge(rds_umap@meta.data,pred.test1,by="cell.names")
rownames(rds_umap@meta.data)<-rds_umap@meta.data$cell.names0
rds_umap@meta.data$tumor_subpopulation <- "NA"
normal.cells <- gsub('-','.',pred.test$cell.names)[which(pred.test$copykat.pred=="diploid")]
rds_umap@meta.data$tumor_subpopulation[rds_umap@meta.data$cell.names %in% normal.cells] <- "normal"
rds_umap@meta.data$tumor_subpopulation[rds_umap@meta.data$cell.names %in% names(hc.umap[hc.umap==1])] <- "tumor_subpopulation1"
rds_umap@meta.data$tumor_subpopulation[rds_umap@meta.data$cell.names %in% names(hc.umap[hc.umap==2])] <- "tumor_subpopulation2"
color_all <-c("#4DBBD5FF","#E64B35FF","#00A087FF","#3C5488FF","#F39B7FFF","#8491B4FF","#91D1C2FF","#DC0000FF","#7E6148FF","#B09C85FF","#3B4992FF","#EE0000FF","#008B45FF")
length_group <-length(unique(rds_umap@meta.data$seurat_clusters))
length_pred <- length(unique(rds_umap@meta.data$copykat.pred))
length_subpopulation<- length(unique(rds_umap@meta.data$tumor_subpopulation))
p0<-theme(panel.grid=element_blank(), legend.background = element_rect(colour = NA),
legend.title = element_blank(),legend.text = element_text(face="plain", color="black",size=20),
axis.text.x = element_text(color="black",size=20),
axis.text.y = element_text(color="black",size=20),
axis.title.x = element_text(face="plain", color="black",size=20),
axis.title.y = element_text(face="plain", color="black",size=20))
pdf("umap_cluster_samples.pdf",20,12)
p1 <- DimPlot(rds_umap, reduction = "umap", cols=color_all[1:length_group],label = TRUE,label.size = 8,pt.size = 1)+p0
p2 <- DimPlot(rds_umap, reduction = "umap", cols=color_all[1:length_pred],group.by = "copykat.pred",pt.size = 1)+p0
p3 <- DimPlot(rds_umap, reduction = "umap", cols=color_all[1:length_subpopulation],group.by = "tumor_subpopulation",pt.size = 1)+p0
p4<-plot_grid(p1,p2,p3,ncol=3)
print(p4)
dev.off()
总 结
参 考
往 期 回 顾